This notebook shows the handover plots for all pathways in the MINOS microsimulation model.
Handover plots are not strictly statistical validation, but more a type of ‘sanity check’ for transitioned variables. A handover plot shows the ‘handover’ from input data to predicted outcomes, and allows us to see at a glance if the predicted outputs are following the same trends seen in the input data.
Source utils for some functions.
source(here::here('minos', 'utils_datain.R'))
source(here::here('minos', 'utils_validation_vis.R'))
source(here::here('minos', 'validation', 'utils.r'))
# Read raw datafiles in
raw.files <- list.files(here::here('data', 'final_US'), pattern='[0-9]{4}_US_cohort.csv', full.names = TRUE)
raw.dat <- do.call(rbind, lapply(raw.files, read.csv))
out.path <- here::here('output', 'default_config/')
base.dat <- read_singular_local_out(out.path, 'baseline', drop.dead = TRUE)
start.year <- 2020
save.path <- here::here("plots", "handovers", 'default')
create.if.not.exists(save.path)
shall.we.save <- FALSE
# first figure out how to plot final_US
# start with hh_income
handover_boxplots(raw.dat, base.dat, 'hh_income')
handover_lineplots(raw.dat, base.dat, "hh_income")
# raw.income2 <- raw.dat %>%
# dplyr::select(pidp, time, hh_income) %>%
# filter(pidp %in% raw.income.wave1)
#
# base.income <- base.income2 <- base.dat %>%
# dplyr::select(pidp, time, hh_income) %>%
# filter(pidp %in% raw.income.wave1)
#
# handover_lineplots(raw.d)
# ## Try a version where final_US is limited to only those present from wave 1
# # onwards because the sample refreshments are messing with the plot
# raw.income.wave1 <- raw.dat$pidp[raw.dat$time == 2009]
#
# raw.income2 <- raw.dat %>%
# dplyr::select(pidp, time, hh_income) %>%
# filter(pidp %in% raw.income.wave1) %>%
# group_by(time) %>%
# summarise(income = mean(hh_income, na.rm = TRUE)) %>%
# mutate(source = 'final_US')
#
# base.income2 <- base.dat %>%
# dplyr::select(pidp, time, hh_income) %>%
# filter(pidp %in% raw.income.wave1) %>%
# group_by(time) %>%
# summarise(income = mean(hh_income, na.rm = TRUE)) %>%
# mutate(source = 'baseline_output')
#
# income2 <- rbind(raw.income2, base.income2)
#
# # Now plot
# ggplot(data = income2, mapping = aes(x = time, y = income, group = source, colour = source)) +
# geom_line() +
# geom_vline(xintercept=start.year, linetype='dotted') +
# labs(title = 'Household Income') +
# xlab('Year') +
# ylab('Income')
#
# if (shall.we.save) {
# ggsave(filename = 'hh_income_wav1.png',
# plot = last_plot(),
# path = save.path)
# }
#
# raw.income.check <- raw.dat %>%
# dplyr::select(pidp, time, hh_income) %>%
# filter(pidp %in% raw.income.wave1)
#
# base.income.check <- base.dat %>%
# dplyr::select(pidp, time, hh_income) %>%
# filter(pidp %in% raw.income.wave1)
#
# rm(raw.income, base.income, income, raw.income.wave1, raw.income2, base.income2,
# raw.income.check, base.income.check)
raw.inc <- raw.dat %>%
select(pidp, age, time, hh_income)
base.inc <- base.dat %>%
select(pidp, age, time, hh_income)
income.spag <- rbind(raw.inc, base.inc)
density_ridges(income.spag, 'hh_income',
save=TRUE,
save.path=save.path)
pidp_sample <- sample(unique(income.spag$pidp), size=nrow(raw.inc)/10, replace=FALSE)
income.spag.sample <- income.spag[which(income.spag$pidp %in% pidp_sample), ]
spaghetti_plot(income.spag.sample, 'hh_income',
save = shall.we.save,
save.path = save.path)
spaghetti_highlight_max_plot(income.spag.sample, 'hh_income',
save = shall.we.save,
save.path = save.path)
rm(raw.inc, base.inc, income.spag)
# sf12 boxplot handovers
handover_boxplots(raw.dat, base.dat, 'SF_12')
handover_lineplots(raw.dat, base.dat, "SF_12")
#
# raw.sf12 <- raw.dat %>%
# dplyr::select(pidp, time, SF_12) %>%
# group_by(time) %>%
# summarise(sf12 = mean(SF_12, na.rm = TRUE)) %>%
# mutate(source = 'final_US')
#
# base.sf12 <- base.dat %>%
# dplyr::select(pidp, time, SF_12) %>%
# group_by(time) %>%
# summarise(sf12 = mean(SF_12, na.rm = TRUE)) %>%
# mutate(source = 'baseline_output')
#
# # merge before plot
# sf12 <- rbind(raw.sf12, base.sf12)
#
# # Now plot
# ggplot(data = sf12, mapping = aes(x = time, y = sf12, group = source, colour = source)) +
# geom_smooth() +
# #geom_line() +
# geom_vline(xintercept=start.year, linetype='dotted') +
# labs(title = 'Mental Wellbeing (SF12)', subtitle = 'Full Sample') +
# xlab('Year') +
# ylab('SF12')
#
# ## Try a version where final_US is limited to only those present from wave 1
# # onwards because the sample refreshments are messing with the plot
# raw.sf12.wave1 <- raw.dat$pidp[raw.dat$time == 2009]
#
# raw.sf12.2 <- raw.dat %>%
# dplyr::select(pidp, time, SF_12) %>%
# filter(pidp %in% raw.sf12.wave1) %>%
# group_by(time) %>%
# summarise(sf12 = mean(SF_12, na.rm = TRUE)) %>%
# mutate(source = 'final_US')
#
# base.sf12.2 <- base.dat %>%
# dplyr::select(pidp, time, SF_12) %>%
# filter(pidp %in% raw.sf12.wave1) %>%
# group_by(time) %>%
# summarise(sf12 = mean(SF_12, na.rm = TRUE)) %>%
# mutate(source = 'baseline_output')
#
# sf12.2 <- rbind(raw.sf12.2, base.sf12.2)
#
# # Now plot
# ggplot(data = sf12.2, mapping = aes(x = time, y = sf12, group = source, colour = source)) +
# geom_line() +
# geom_vline(xintercept=start.year, linetype='dotted') +
# labs(title = 'Mental Wellbeing (SF12 MCS)') +
# xlab('Year') +
# ylab('SF12')
#
# if (shall.we.save) {
# ggsave(filename = 'SF12_wav1.png',
# plot = last_plot(),
# path = save.path)
# }
#
# rm(raw.sf12, base.sf12, sf12, raw.sf12.wave1, raw.sf12.2, base.sf12.2, sf12.2)
raw.sf12 <- raw.dat %>%
select(pidp, age, time, SF_12)
base.sf12 <- base.dat %>%
select(pidp, age, time, SF_12)
sf12.spag <- rbind(raw.sf12, base.sf12)
sf12.spag.sample <- sf12.spag[which(sf12.spag$pidp %in% pidp_sample), ]
spaghetti_plot(sf12.spag.sample, 'SF_12',
save = shall.we.save,
save.path = save.path)
spaghetti_highlight_max_plot(sf12.spag.sample, 'SF_12',
save = shall.we.save,
save.path = save.path)
density_ridges(sf12.spag, "SF_12",
save = T,
save.path = save.path)
rm(raw.sf12, base.sf12, sf12.spag)
TODO: Drop negative values ONLY from waves with no data i.e. not 7,9,11
# nutriton quality line/boxplots.
handover_boxplots(raw.dat, base.dat, 'nutrition_quality')
handover_lineplots(raw.dat, base.dat, 'nutrition_quality')
# Try a version where final_US is limited to only those present from wave 1
# onwards because the sample refreshments are messing with the plot
# raw.wave1 <- raw.dat$pidp[raw.dat$time == 2009]
#
# raw.nut2 <- raw.dat %>%
# dplyr::select(pidp, time, nutrition_quality) %>%
# filter(pidp %in% raw.wave1) %>%
# filter(nutrition_quality >= 0) %>%
# group_by(time) %>%
# summarise(nutrition_quality = mean(nutrition_quality, na.rm = TRUE)) %>%
# mutate(source = 'final_US')
#
# base.nut2 <- base.dat %>%
# dplyr::select(pidp, time, nutrition_quality) %>%
# filter(pidp %in% raw.wave1) %>%
# group_by(time) %>%
# summarise(nutrition_quality = mean(nutrition_quality, na.rm = TRUE)) %>%
# mutate(source = 'baseline_output')
#
# nutrition_quality2 <- rbind(raw.nut2, base.nut2)
#
# # Now plot
# ggplot(data = nutrition_quality2, mapping = aes(x = time, y = nutrition_quality, group = source, colour = source)) +
# geom_line() +
# geom_vline(xintercept=start.year, linetype='dotted') +
# labs(title = 'Nutrition Quality') +
# xlab('Year') +
# ylab('Nutrition Quality')
#
# if (shall.we.save) {
# ggsave(filename = 'nutrition_quality_wav1.png',
# plot = last_plot(),
# path = save.path)
# }
#
# rm(raw.nut, base.nut, nutrition_quality, raw.wave1, raw.nut2, base.nut2,
# nutrition_quality2)
raw.nut <- raw.dat %>%
select(pidp, age, time, nutrition_quality)
base.nut <- base.dat %>%
select(pidp, age, time, nutrition_quality)
nut.spag <- rbind(raw.nut, base.nut)
spaghetti_plot(nut.spag, 'nutrition_quality',
save = shall.we.save,
save.path = save.path)
density_ridges(nut.spag, "nutrition_quality",
save = T,
save.path = save.path)
rm(raw.nut, base.nut, nut.spag)